package com.xuyuan.ai.audio;


import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import org.springframework.ai.image.ImageModel;
import org.springframework.ai.image.ImagePrompt;
import org.springframework.ai.image.ImageResponse;
import org.springframework.ai.openai.OpenAiAudioSpeechModel;
import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
import org.springframework.ai.openai.OpenAiAudioTranscriptionModel;
import org.springframework.ai.openai.OpenAiAudioTranscriptionOptions;
import org.springframework.ai.openai.api.OpenAiAudioApi;
import org.springframework.ai.openai.audio.speech.SpeechPrompt;
import org.springframework.ai.openai.audio.speech.SpeechResponse;
import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMetadata;
import org.springframework.ai.qianfan.QianFanImageOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

/**
 * TODO 没有找到可用的Audio Model，没有直接连OpenAI提供的模型
 *
 * @author xuyuan
 * @desc Audio Model
 */
@RestController
public class AudioController {

    @Autowired
    OpenAiAudioTranscriptionModel openAiTranscriptionModel;
    @Autowired
    OpenAiAudioSpeechModel openAiAudioSpeechModel;

    /**
     * 语音转文字
     */
    @PostMapping("/ai/audio-to-text")
    public String audio1(@RequestPart("audio") MultipartFile audio) {
        OpenAiAudioApi.TranscriptResponseFormat responseFormat = OpenAiAudioApi.TranscriptResponseFormat.VTT;
        OpenAiAudioTranscriptionOptions transcriptionOptions = OpenAiAudioTranscriptionOptions.builder()
                .language("en")
                .prompt("Ask not this, but ask that")
                .temperature(0f)
                .responseFormat(responseFormat)
                .build();
        AudioTranscriptionPrompt transcriptionRequest = new AudioTranscriptionPrompt(audio.getResource(), transcriptionOptions);
        AudioTranscriptionResponse response = openAiTranscriptionModel.call(transcriptionRequest);
        return response.getResult().getOutput();
    }

    /**
     * 文字转语音
     */
    @GetMapping("/ai/text-to-audio")
    public byte[] audio2() {
        var speechOptions = OpenAiAudioSpeechOptions.builder()
                .responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
                .speed(1.0f)
                .model(OpenAiAudioApi.TtsModel.TTS_1.value)
                .build();

        var speechPrompt = new SpeechPrompt("Hello, this is a text-to-speech example.", speechOptions);
        SpeechResponse response = this.openAiAudioSpeechModel.call(speechPrompt);

        // Accessing metadata (rate limit info)
        OpenAiAudioSpeechResponseMetadata metadata = response.getMetadata();
        return response.getResult().getOutput();
    }
}